geo_name_list = c("GSE129166", "GSE34748", "GSE51675", "GSE15296", "GSE46474", "GSE50084")
geo_list = c(getGEO(filename="Data/GSE129166_series_matrix.txt.gz"), getGEO(filename="Data/GSE34748_series_matrix.txt.gz"), getGEO(filename="Data/GSE51675_series_matrix.txt.gz"), getGEO(filename="Data/GSE15296_series_matrix.txt.gz"), getGEO(filename="Data/GSE46474_series_matrix.txt.gz"), getGEO(filename="Data/GSE50084_series_matrix.txt.gz"))
Using locally cached version of GPL570 found here:
C:\Users\lmcca\AppData\Local\Temp\RtmpYzuDYu/GPL570.soft.gz 
Using locally cached version of GPL570 found here:
C:\Users\lmcca\AppData\Local\Temp\RtmpYzuDYu/GPL570.soft.gz 
Using locally cached version of GPL570 found here:
C:\Users\lmcca\AppData\Local\Temp\RtmpYzuDYu/GPL570.soft.gz 
for(geo in geo_list) {
  print(fData(geo))
  print(names(which(colSums(is.na(fData(geo)))>0)))
}
character(0)
character(0)
[1] "GENE"    "TIGR_ID"
character(0)
character(0)
character(0)

All gse have a gene symbol column which can be used to match records except for the last one, GSE50084 which has the gene as the 2nd listed element under gene_assignment, and GSE51675 which is missing all of its gene info.

for(geo in geo_list) {
  print(pData(geo))
  print(names(which(colSums(is.na(pData(geo)))>0)))
}
character(0)
character(0)
character(0)
character(0)
character(0)
character(0)
for (geo in geo_list) {
  print(data.frame(t(exprs(geo))))
  print(names(which(colSums(is.na(data.frame(t(exprs(geo)))))>0)))
}
character(0)
character(0)
character(0)
character(0)
character(0)
character(0)
for (geo in geo_list) {
  df = data.frame(t(exprs(geo)))
  boxplot(df[1:100])
}

Plots for GSE51675 and GSE15296 show that they have been transformed somehow. Other than those, the other 4 seem fairly contained between 0-14 expression intensity.

GSE51675 has been discarded due to small sample size, missing info, and transformed expression values. GSE50084 has been discarded

# Contains "GSE129166", "GSE34748"
length(union(fData(geo_list[[1]])["Gene Symbol"], fData(geo_list[[2]])["Gene Symbol"])[[1]])
[1] 54675
# Contains "GSE129166", "GSE46474"
length(union(fData(geo_list[[1]])["Gene Symbol"], fData(geo_list[[5]])["Gene Symbol"])[[1]])
[1] 54675

All 3 have perfect overlap so the datasets recommended for use are “GSE129166”, “GSE34748”, and “GSE46474”

length(union(fData(geo_list[[1]])["Gene Symbol"], fData(geo_list[[4]])["Gene Symbol"])[[1]])
[1] 54675
LS0tDQp0aXRsZTogIkxpYW0gUmVzZWFyY2ggMSINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCmBgYHtyLCBpbmNsdWRlPUZBTFNFfQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KHR1bmVSKQ0KbGlicmFyeShkZXZ0b29scykNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkodHNmZWF0dXJlcykNCmxpYnJhcnkoY2xhc3MpDQpsaWJyYXJ5KGN2VG9vbHMpDQpsaWJyYXJ5KHJhbmRvbUZvcmVzdCkNCmxpYnJhcnkoR0VPcXVlcnkpIA0KbGlicmFyeShSLnV0aWxzKQ0KbGlicmFyeShyZXNoYXBlMikNCmxpYnJhcnkobGltbWEpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShlMTA3MSkNCmxpYnJhcnkoRFQpDQpsaWJyYXJ5KHZpcmlkaXMpDQpsaWJyYXJ5KHBsb3RseSkNCmxpYnJhcnkoc2NhbGVzKQ0KYGBgDQoNCmBgYHtyfQ0KZ2VvX25hbWVfbGlzdCA9IGMoIkdTRTEyOTE2NiIsICJHU0UzNDc0OCIsICJHU0U1MTY3NSIsICJHU0UxNTI5NiIsICJHU0U0NjQ3NCIsICJHU0U1MDA4NCIpDQpnZW9fbGlzdCA9IGMoZ2V0R0VPKGZpbGVuYW1lPSJEYXRhL0dTRTEyOTE2Nl9zZXJpZXNfbWF0cml4LnR4dC5neiIpLCBnZXRHRU8oZmlsZW5hbWU9IkRhdGEvR1NFMzQ3NDhfc2VyaWVzX21hdHJpeC50eHQuZ3oiKSwgZ2V0R0VPKGZpbGVuYW1lPSJEYXRhL0dTRTUxNjc1X3Nlcmllc19tYXRyaXgudHh0Lmd6IiksIGdldEdFTyhmaWxlbmFtZT0iRGF0YS9HU0UxNTI5Nl9zZXJpZXNfbWF0cml4LnR4dC5neiIpLCBnZXRHRU8oZmlsZW5hbWU9IkRhdGEvR1NFNDY0NzRfc2VyaWVzX21hdHJpeC50eHQuZ3oiKSwgZ2V0R0VPKGZpbGVuYW1lPSJEYXRhL0dTRTUwMDg0X3Nlcmllc19tYXRyaXgudHh0Lmd6IikpDQpgYGANCg0KYGBge3J9DQpmb3IoZ2VvIGluIGdlb19saXN0KSB7DQogIHByaW50KGZEYXRhKGdlbykpDQogIHByaW50KG5hbWVzKHdoaWNoKGNvbFN1bXMoaXMubmEoZkRhdGEoZ2VvKSkpPjApKSkNCn0NCmBgYA0KQWxsIGdzZSBoYXZlIGEgZ2VuZSBzeW1ib2wgY29sdW1uIHdoaWNoIGNhbiBiZSB1c2VkIHRvIG1hdGNoIHJlY29yZHMgZXhjZXB0IGZvciB0aGUgbGFzdCBvbmUsIEdTRTUwMDg0IHdoaWNoIGhhcyB0aGUgZ2VuZSBhcyB0aGUgMm5kIGxpc3RlZCBlbGVtZW50IHVuZGVyIGdlbmVfYXNzaWdubWVudCwgYW5kIEdTRTUxNjc1IHdoaWNoIGlzIG1pc3NpbmcgYWxsIG9mIGl0cyBnZW5lIGluZm8uIA0KDQoNCmBgYHtyfQ0KZm9yKGdlbyBpbiBnZW9fbGlzdCkgew0KICBwcmludChwRGF0YShnZW8pKQ0KICBwcmludChuYW1lcyh3aGljaChjb2xTdW1zKGlzLm5hKHBEYXRhKGdlbykpKT4wKSkpDQp9DQpgYGANCg0KYGBge3J9DQpmb3IgKGdlbyBpbiBnZW9fbGlzdCkgew0KICBwcmludChkYXRhLmZyYW1lKHQoZXhwcnMoZ2VvKSkpKQ0KICBwcmludChuYW1lcyh3aGljaChjb2xTdW1zKGlzLm5hKGRhdGEuZnJhbWUodChleHBycyhnZW8pKSkpKT4wKSkpDQp9DQpgYGANCg0KYGBge3J9DQojIFRPRE86IEZpbmQgdGhlIG91dGxpZXIgZ2VuZXMgYW5kIGNvbXBhcmUNCmZvciAoZ2VvIGluIGdlb19saXN0KSB7DQogIGRmID0gZGF0YS5mcmFtZSh0KGV4cHJzKGdlbykpKQ0KICBib3hwbG90KGRmWzE6MTAwXSkNCn0NCmBgYA0KUGxvdHMgZm9yIEdTRTUxNjc1IGFuZCBHU0UxNTI5NiBzaG93IHRoYXQgdGhleSBoYXZlIGJlZW4gdHJhbnNmb3JtZWQgc29tZWhvdy4gT3RoZXIgdGhhbiB0aG9zZSwgdGhlIG90aGVyIDQgc2VlbSBmYWlybHkgY29udGFpbmVkIGJldHdlZW4gMC0xNCBleHByZXNzaW9uIGludGVuc2l0eS4NCg0KR1NFNTE2NzUgaGFzIGJlZW4gZGlzY2FyZGVkIGR1ZSB0byBzbWFsbCBzYW1wbGUgc2l6ZSwgbWlzc2luZyBpbmZvLCBhbmQgdHJhbnNmb3JtZWQgZXhwcmVzc2lvbiB2YWx1ZXMuIEdTRTUwMDg0IGhhcyBiZWVuIGRpc2NhcmRlZA0KDQoNCg0KYGBge3J9DQojIENvbnRhaW5zICJHU0UxMjkxNjYiLCAiR1NFMzQ3NDgiDQpsZW5ndGgodW5pb24oZkRhdGEoZ2VvX2xpc3RbWzFdXSlbIkdlbmUgU3ltYm9sIl0sIGZEYXRhKGdlb19saXN0W1syXV0pWyJHZW5lIFN5bWJvbCJdKVtbMV1dKQ0KYGBgDQoNCmBgYHtyfQ0KIyBDb250YWlucyAiR1NFMTI5MTY2IiwgIkdTRTQ2NDc0Ig0KbGVuZ3RoKHVuaW9uKGZEYXRhKGdlb19saXN0W1sxXV0pWyJHZW5lIFN5bWJvbCJdLCBmRGF0YShnZW9fbGlzdFtbNV1dKVsiR2VuZSBTeW1ib2wiXSlbWzFdXSkNCmBgYA0KDQpBbGwgMyBoYXZlIHBlcmZlY3Qgb3ZlcmxhcCBzbyB0aGUgZGF0YXNldHMgcmVjb21tZW5kZWQgZm9yIHVzZSBhcmUgIkdTRTEyOTE2NiIsICJHU0UzNDc0OCIsIGFuZCAiR1NFNDY0NzQiDQoNCmBgYHtyfQ0KbGVuZ3RoKHVuaW9uKGZEYXRhKGdlb19saXN0W1sxXV0pWyJHZW5lIFN5bWJvbCJdLCBmRGF0YShnZW9fbGlzdFtbNF1dKVsiR2VuZSBTeW1ib2wiXSlbWzFdXSkNCmBgYA0KDQo=